package jupitermouse.site.project.dataclean

import org.apache.spark.sql.{SaveMode, SparkSession}

object SparkStatCleanJob2 {

  def main(args: Array[String]): Unit = {
    val inputPath = "file:///E:\\workroom\\learn\\spark\\access.log"
    val outputPath = "file:///E:\\workroom\\learn\\spark\\access.json"

    val spark = SparkSession.builder().appName("TopNStatJob")
      .master("local[2]").getOrCreate()

    val accessRDD = spark.sparkContext.textFile(inputPath)

    //RDD ==> DF
    val accessDF = spark.createDataFrame(accessRDD.map(x => AccessConvertUtil.parseLog(x)), AccessConvertUtil.struct)

    accessDF.write.json(outputPath)
/*
    accessDF.coalesce(1).write
      .format("parquet").mode(SaveMode.Overwrite)
      .partitionBy("day").save(outputPath)
*/

    spark.stop
  }
}
